# US subnational

library(tidyverse)
library(fixest)
library(broom)
library(ggthemes)
library(openintro)

select <- dplyr::select
shift <- data.table::shift

#### Load in subnational policy outcomes from Trachtman ####
# https://doi.org/10.1016/j.enpol.2019.111214

trachtman <- read_excel("data/trachtman_ep_2020.xlsx") %>% 
  select(abb, year, rps_lead, ftg_lead, aceee_lead, sev_lead, unif_dem, unif_rep) %>% 
  # Outcome variables are already "lead" by one year, so reverse this
  arrange(abb, year) %>% 
  group_by(abb) %>% 
  mutate(rps = shift(rps_lead, type = "lag", n = 1),
         ftg = shift(ftg_lead, type = "lag", n = 1),
         aceee = shift(aceee_lead, type = "lag", n = 1),
         unif_dem_lag1 = shift(unif_dem, type = "lag", n = 1),
         unif_dem_lag2 = shift(unif_dem, type = "lag", n = 2),
         unif_dem_lag3 = shift(unif_dem, type = "lag", n = 3),
         unif_rep_lag1 = shift(unif_rep, type = "lag", n = 1),
         unif_rep_lag2 = shift(unif_rep, type = "lag", n = 2),
         unif_rep_lag3 = shift(unif_rep, type = "lag", n = 3)) %>% 
  select(abb, year, rps, ftg, aceee, starts_with("unif")) %>%
  as.data.frame()

#### Load in state-level temperature data ####

## Load in NOAA's average annual temperature data for US states
# Source: https://www.ncdc.noaa.gov/cag/statewide/mapping/110/tavg/201807/1

noaa_average_temp <- read_csv("data/noaa_average_temp.csv", 
                              skip = 3) %>% 
  select(state = Location, Date, temp = Value) %>% 
  separate(Date, into = c("year", "month"), sep = 4L) %>% 
  mutate_at(vars(year, month), ~as.numeric(.))
# head(noaa_average_temp)

## Load in NOAA's annual (cooling) degree days data
# Source: https://www.ncdc.noaa.gov/cag/statewide/mapping/110/cdd/201807/1/value

noaa_degree_days <- read_csv("data/noaa_cooling_degree_days.csv", 
                             skip = 3) %>% 
  select(state = Location, Date, degree_days = Value) %>% 
  separate(Date, into = c("year", "month"), sep = 4L) %>% 
  mutate_at(vars(year, month), ~as.numeric(.))
# head(noaa_degree_days)

## Merge temperature data

us_temp <- full_join(noaa_average_temp, noaa_degree_days, 
                     by = c("state", "year", "month")) %>% 
  mutate(abb = state2abbr(state)) %>% 
  # Convert to Celsius
  mutate(temp = round((temp - 32) * (5/9), 1),
         degree_days = round(degree_days * (5/9), 1)) %>% 
  # Tidy to measure annual average temp, degree days, and in hottest season (JJA)
  group_by(state, year) %>% 
  mutate(average_annual_temp = round(mean(temp, na.rm=T), 1), # Average temperature in a US state in a year
         total_annual_dd = sum(degree_days, na.rm=T)) %>% # Sum of cooling degree days in a US state in a year
  # Keep June, July, August
  filter(month %in% 6:8) %>% 
  group_by(state, year) %>% 
  mutate(average_jja_temp = round(mean(temp, na.rm=T), 1), # Average temperature in a US state in June-July-August in a year
         total_jja_dd = sum(degree_days, na.rm=T)) %>% # Sum of cooling degree days in a US state in June-July-August in a year (daily observed - 65F, continuous)
  ungroup() %>% 
  distinct(state, abb, year, average_annual_temp, total_annual_dd, average_jja_temp, total_jja_dd) %>% 
  # Scaled, otherwise the point estimate is basically zero
  mutate(total_jja_dd_scaled = scale(total_jja_dd)) %>% 
  select(state, abb, year, starts_with("average"), starts_with("total")) %>% 
  group_by(abb) %>% 
  mutate(average_annual_temp_lag1 = shift(average_annual_temp, 1),
         average_annual_temp_lag2 = shift(average_annual_temp, 2),
         average_annual_temp_lag3 = shift(average_annual_temp, 3),
         average_jja_temp_lag1 = shift(average_jja_temp, 1),
         average_jja_temp_lag2 = shift(average_jja_temp, 2),
         average_jja_temp_lag3 = shift(average_jja_temp, 3),
         total_jja_dd_lag1 = shift(total_jja_dd_scaled, 1),
         total_jja_dd_lag2 = shift(total_jja_dd_scaled, 2),
         total_jja_dd_lag3 = shift(total_jja_dd_scaled, 3)) %>% 
  select(-total_annual_dd) %>% 
  distinct() 
# head(us_temp)

# rm(noaa_average_temp, noaa_degree_days)

#### Merge datasets together ####

subnational_df <- full_join(us_temp, trachtman, by = c("abb", "year")) %>%
  arrange(abb, year) %>% 
  ungroup() %>% 
  filter(year>1999) %>% 
  # Scale outcomes
  mutate(scaled_rps = scale(.$rps),
         scaled_ftg = scale(.$ftg),
         scaled_aceee = scale(.$aceee)) %>% 
  mutate_at(vars(matches("scaled")), ~as.numeric(.))

rm(noaa_average_temp, noaa_degree_days, trachtman)

#### US spatial terms ####

# US contiguity

us_contig_raw <- read_csv("data/us_states_contiguity.csv")

us_contig_df <- us_contig_raw %>% 
  pivot_longer(names_to = "contiguous", values_to = "state2name", X1:X8) %>% 
  filter(!is.na(state2name)) %>% 
  mutate(contiguous = 1) %>% 
  mutate(state2abb = state2abbr(state2name),
         state1abb = state2abbr(`State name`)) %>% 
  select(state1abb, state2abb, contiguous)

states_list_analysis <- subnational_df %>% 
  filter(!is.na(average_annual_temp),
         year == 2015) %>% 
  select(abb) %>% distinct() %>% 
  as.vector()


# Unique, sorted states list
states_list <- cbind.data.frame(abb = unique(us_contig_df$state1abb))
states_list <- states_list %>% filter(abb %in% states_list_analysis$abb)
states_list <- sort(unique(states_list$abb))

# Make contiguity dyadic data
cont_df <- data.frame(state1abb = rep(states_list, times = length(states_list)),
                      state2abb = rep(states_list, each = length(states_list))) %>% 
  left_join(., us_contig_df, by = c("state1abb", "state2abb")) %>% 
  mutate(contiguous = ifelse(is.na(contiguous), 0, contiguous)) %>% 
  arrange(state1abb, state2abb)
# cont_df %>% .[1:10,]

# Row-standardize and make a matrix
cont_matrix <- cont_df %>% 
  pivot_wider(names_from = state2abb, values_from = contiguous) %>% 
  column_to_rownames("state1abb") %>% 
  as.data.frame() %>% 
  mutate(row_total = rowSums(across(where(is.numeric)))) %>% 
  mutate_at(vars(AL:WY), ~./row_total) %>% 
  select(-row_total) %>% 
  as.matrix()
diag(cont_matrix) <- 0

us_temp_demeaned <- us_temp %>% 
  ungroup() %>% 
  filter(year>=1990) %>% 
  # Annual average temperature
  mutate(resid_annual_lag1 = residuals(lm(average_annual_temp_lag1 ~ 
                                            as.factor(abb) + 
                                            as.factor(year),
                                          data = .))) %>% 
  mutate(resid_annual_lag2 = residuals(lm(average_annual_temp_lag2 ~
                                            as.factor(abb) + 
                                            as.factor(year),
                                          data = .))) %>% 
  mutate(resid_annual_lag3 = residuals(lm(average_annual_temp_lag3 ~ 
                                            as.factor(abb) + 
                                            as.factor(year),
                                          data = .))) %>% 
  # Hottest season average temperature
  mutate(resid_season_lag1 = residuals(lm(average_jja_temp_lag1 ~ 
                                            as.factor(abb) + 
                                            as.factor(year),
                                          data = .))) %>% 
  mutate(resid_season_lag2 = residuals(lm(average_jja_temp_lag2 ~ 
                                            as.factor(abb) + 
                                            as.factor(year),
                                          data = .))) %>% 
  mutate(resid_season_lag3 = residuals(lm(average_jja_temp_lag3 ~ 
                                            as.factor(abb) + 
                                            as.factor(year),
                                          data = .))) %>% 
  select(abb, year, starts_with("resid")) %>% 
  arrange(abb)
# head(us_temp_demeaned)

### Now make the spatial weights

# Make object for loops

# To hold loop outputs
spatial_weights_annual_lag1 <-  as.data.frame(matrix(NA, nrow = 48, ncol = 31))
spatial_weights_annual_lag2 <-  as.data.frame(matrix(NA, nrow = 48, ncol = 31))
spatial_weights_annual_lag3 <-  as.data.frame(matrix(NA, nrow = 48, ncol = 31))
spatial_weights_season_lag1 <-  as.data.frame(matrix(NA, nrow = 48, ncol = 31))
spatial_weights_season_lag2 <-  as.data.frame(matrix(NA, nrow = 48, ncol = 31))
spatial_weights_season_lag3 <-  as.data.frame(matrix(NA, nrow = 48, ncol = 31))

# List of years to filter on and loop through
year_list <- unique(us_temp_demeaned$year)

for (yearr in year_list) {
  residuals <- us_temp_demeaned %>% 
    filter(year == yearr) 
  
  # Annual, lag 1
  hold_spatial_weight <- cont_matrix %*% residuals$resid_annual_lag1
  spatial_weights_annual_lag1[,yearr-1989] <- hold_spatial_weight
  # Annual, lag 2
  hold_spatial_weight <- cont_matrix %*% residuals$resid_annual_lag2
  spatial_weights_annual_lag2[,yearr-1989] <- hold_spatial_weight
  # Annual, lag 3
  hold_spatial_weight <- cont_matrix %*% residuals$resid_annual_lag3
  spatial_weights_annual_lag3[,yearr-1989] <- hold_spatial_weight
  # Season, lag 1
  hold_spatial_weight <- cont_matrix %*% residuals$resid_season_lag1
  spatial_weights_season_lag1[,yearr-1989] <- hold_spatial_weight
  # Season, lag 2
  hold_spatial_weight <- cont_matrix %*% residuals$resid_season_lag2
  spatial_weights_season_lag2[,yearr-1989] <- hold_spatial_weight
  # Annual, lag 3
  hold_spatial_weight <- cont_matrix %*% residuals$resid_season_lag3
  spatial_weights_season_lag3[,yearr-1989] <- hold_spatial_weight
}

# Merge all the spatial weights together
spatial_weights_df <- full_join(spatial_weights_annual_lag1 %>% 
                                  pivot_longer(names_to = "names", values_to = "annual_W_lag1", V1:V31) %>% 
                                  mutate(abb = rep(states_list, times = length(year_list)),
                                         year = rep(year_list, each = length(states_list))) %>% 
                                  select(abb, year, 2),
                                spatial_weights_annual_lag2 %>% 
                                  pivot_longer(names_to = "names", values_to = "annual_W_lag2", V1:V31) %>% 
                                  mutate(abb = rep(states_list, times = length(year_list)),
                                         year = rep(year_list, each = length(states_list))) %>% 
                                  select(abb, year, 2),
                                by = c("abb", "year")) %>% 
  full_join(., spatial_weights_annual_lag3 %>% 
              pivot_longer(names_to = "names", values_to = "annual_W_lag3", V1:V31) %>% 
              mutate(abb = rep(states_list, times = length(year_list)),
                     year = rep(year_list, each = length(states_list))) %>% 
              select(abb, year, 2),
            by = c("abb", "year")) %>% 
  full_join(., spatial_weights_season_lag1 %>% 
              pivot_longer(names_to = "names", values_to = "season_W_lag1", V1:V31) %>% 
              mutate(abb = rep(states_list, times = length(year_list)),
                     year = rep(year_list, each = length(states_list))) %>% 
              select(abb, year, 2),
            by = c("abb", "year")) %>% 
  full_join(., spatial_weights_season_lag2 %>% 
              pivot_longer(names_to = "names", values_to = "season_W_lag2", V1:V31) %>% 
              mutate(abb = rep(states_list, times = length(year_list)),
                     year = rep(year_list, each = length(states_list))) %>% 
              select(abb, year, 2),
            by = c("abb", "year")) %>% 
  full_join(., spatial_weights_season_lag3 %>% 
              pivot_longer(names_to = "names", values_to = "season_W_lag3", V1:V31) %>% 
              mutate(abb = rep(states_list, times = length(year_list)),
                     year = rep(year_list, each = length(states_list))) %>% 
              select(abb, year, 2),
            by = c("abb", "year")) 

subnational_df <- full_join(subnational_df, spatial_weights_df, by = c("abb", "year"))

#### US subnational analysis #### 

## State-specific trends

m_us_annual1 <- feols(c(scaled_rps, scaled_ftg, scaled_aceee)
                          ~ average_annual_temp_lag1 + annual_W_lag1 |
                            abb[year] + year, # trends
                          data = subnational_df) %>% 
  as.list()

m_us_season1 <- feols(c(scaled_rps, scaled_ftg, scaled_aceee)
                      ~ average_jja_temp_lag1 + season_W_lag1 |
                        abb[year] + year, # trends
                      data = subnational_df) %>% 
  as.list()

m_us_annual2 <- feols(c(scaled_rps, scaled_ftg, scaled_aceee)
                      ~ average_annual_temp_lag2 + annual_W_lag2 |
                        abb[year] + year, # trends
                      data = subnational_df) %>% 
  as.list()

m_us_season2 <- feols(c(scaled_rps, scaled_ftg, scaled_aceee)
                      ~ average_jja_temp_lag2 + season_W_lag2 |
                        abb[year] + year, # trends
                      data = subnational_df) %>% 
  as.list()

m_us_annual3 <- feols(c(scaled_rps, scaled_ftg, scaled_aceee)
                      ~ average_annual_temp_lag3 + annual_W_lag3 |
                        abb[year] + year, # trends
                      data = subnational_df) %>% 
  as.list()

m_us_season3 <- feols(c(scaled_rps, scaled_ftg, scaled_aceee)
                      ~ average_jja_temp_lag3 + season_W_lag3 |
                        abb[year] + year, # trends
                      data = subnational_df) %>% 
  as.list()

list_us_trends <- do.call(c, list(m_us_annual1, m_us_annual2, m_us_annual3,
                             m_us_season1, m_us_season2, m_us_season3))


results_us_trends <-  setNames(as.data.frame(matrix(NA, nrow = 18, ncol = 10)), 
                               c("outcome", "treatment", "beta", "std_error",
                                 "t_stat", "p_value", "nobs", "n_countries",
                                 "min_year", "max_year"))

for (i in 1:18){
  results_us_trends[i, 1] <- list_us_trends[[i]]$fml %>% gsub("\\~", "", .) %>% .[2] # outcome
  results_us_trends[i, 2] <- list_us_trends[[i]]$fml %>% gsub("\\~", "", .) %>% .[3] # treatment
  results_us_trends[i, 3] <- list_us_trends[[i]] %>% tidy() %>% filter(!grepl("_W_",term)) %>% select(2) # beta
  results_us_trends[i, 4] <- list_us_trends[[i]] %>% tidy() %>% filter(!grepl("_W_",term)) %>% select(3) # std error
  results_us_trends[i, 5] <- list_us_trends[[i]] %>% tidy() %>% filter(!grepl("_W_",term)) %>% select(4) # t-stat
  results_us_trends[i, 6] <- list_us_trends[[i]] %>% tidy() %>% filter(!grepl("_W_",term)) %>% select(5) # p-val
  results_us_trends[i, 7] <- list_us_trends[[i]]$nobs 
  results_us_trends[i, 8] <- length(unique(list_us_trends[[i]]$fixef_id$abb))
  results_us_trends[i, 9] <- min(attr(list_us_trends[[i]]$fixef_id$year, "fixef_names"))
  results_us_trends[i, 10] <- max(attr(list_us_trends[[i]]$fixef_id$year, "fixef_names"))
}
results_us_trends$treatment <- str_sub(results_us_trends$treatment, start = 1L, end = -17L)

results_us_trends

us_labels <- results_us_trends %>% 
  mutate(labels = case_when(outcome=="scaled_rps" ~ "Renewable \nportfolio \nstandard",
                            outcome=="scaled_ftg" ~ "Distributed \ngeneration",
                            outcome=="scaled_aceee" ~ "Energy \nefficiency")) %>% 
  select(labels)

# Plot 
results_us_trends %>%
  # Tidy labels
  mutate(measure = case_when(str_detect(treatment, "average_annual")==TRUE ~ "Annual",
                             str_detect(treatment, "average_jja")==TRUE ~ "Summer",
                             str_detect(treatment, "total_jja_dd")==TRUE ~ "Degree days"),
         Facet = measure,
         Facet = factor(Facet, levels = c("Annual", "Summer", "Degree days")),
         Temperature = case_when(str_detect(treatment, "lag1")==TRUE ~ "One-year lag", 
                                 str_detect(treatment, "lag2")==TRUE ~ "Two-year lag", 
                                 str_detect(treatment, "lag3")==TRUE ~ "Three-year lag")) %>% 
  # Create an index for plotting
  group_by(Temperature, measure) %>% 
  mutate(n = row_number()) %>% 
  ungroup() %>% 
  group_by(outcome, measure) %>% 
  mutate(m = row_number()) %>% 
  ungroup() %>% 
  mutate(index = ifelse(m == 1, n - 0.2,
                        ifelse(m == 3, n + 0.2, n))) %>% 
  # critical threshold: 0.05 -> 0.0167; pnorm(2.39) with two-sided
  mutate(ci_lower = beta - 1.96*std_error,
         ci_lower_correction = beta - 2.39*std_error,
         ci_upper = beta + 1.96*std_error,
         ci_upper_correction = beta + 2.39*std_error) %>% 
  # Plot
  ggplot(., aes(beta, index, shape = Temperature)) +
  facet_grid(cols = vars(Facet), scales = "fixed") +
  geom_segment(aes(x = ci_lower_correction, xend = ci_upper_correction,
                   y = index, yend = index),
               size = 1, color = "#79b321") +
  geom_segment(aes(x = ci_lower, xend = ci_upper, 
                   y = index, yend = index), 
               size = 2, color = "#456613") +
  geom_point(size = 3.5, aes(shape = Temperature)) +
  scale_shape_manual(breaks=c("One-year lag","Two-year lag","Three-year lag"),
                     values = c(16,3,4)) +
  scale_color_manual(values = c("black", "black", "black")) +
  geom_vline(xintercept = 0, linetype = 1, colour = "black") +
  scale_y_continuous(trans = "reverse",
                     breaks = c(1:3),
                     labels = unique(us_labels$labels)) +
  labs(x="Regression coefficient and confidence intervals", 
       y="") +
  geom_vline(xintercept = 0) +
  theme_tufte(base_family = "Helvetica") +
  theme(legend.position = "bottom",
        panel.background = element_rect(fill=NA),
        panel.grid.major.x = element_line(colour = "grey90"),
        # panel.grid.minor.x = element_line(colour = "grey90"),
        # panel.grid.major.y = element_line(colour = "grey90"),
        panel.ontop = FALSE,
        text=element_text(size=16))
ggsave("text/coefplot_us_trends.pdf", units = "in", height= 6, width = 10)


